*Preparation for performance analysis


*******************************************************************
* PREPARE DATA
*******************************************************************
clear 
use "${input_stata}\evol_siren.dta" // Load the evolution data of firms
rename eff_3112 eff_3112_en 

merge m:1 siren year using "${input_stata}\lf_light" // merge balance sheet variables
keep if _merge == 1 | _merge == 3 /
drop _merge

* Convert sector 'code' to a string and pad with zeros for consistency
tostring code, gen(code_entry)
replace code_entry = "0" + code_entry if length(code_entry) < 3

***MERGING ADDITIONAL DATASETS
rename (year year_entry) (year_entry year) // Rename variables for proper merging

* Identify 'build' and 'buy' transactions
merge m:1 sirtg year code_entry using "${input_stata}\list_build.dta"
drop _merge

merge m:1 sirtg year code_entry using "${input_stata}\list_buy.dta", update
drop _merge

* Distance in human capital (HC) at t-1
merge m:1 sirtg year code using "${input_stata}\\marche_f_HC_t1_s_brut"
keep if _merge == 3 
drop _merge

* Create human capital distance variables
gen HC_distance_t1_s_brut = 1 - HC_overlap_t1_s_brut
gen HC_dist_var = 1 - HC_overlap

rename (year_entry year) (year year_entry) // Revert renaming for clarity

* Retrieve tightness measure at t-1
preserve
keep if year == year_entry - 1 // Keep data from the previous year
drop if tightness == . 
gquantiles q_tightness = tightness, nq(2) xtile by(apgr_1 code year) // Create quantiles of tightness
rename tightness tightness_t1 
keep sirtg siren year_entry code_entry q_tightness tightness_t1
save "${input_stata}\tightness_t1.dta", replace
restore

merge m:1 sirtg siren year_entry code_entry using "${input_stata}\tightness_t1.dta"
drop _merge

* Merging control variables at group level
merge m:1 sirtg year year_entry code using "${input_stata}\ctrl_var_evol.dta"
rename eff_3112 eff_3112_gr // Rename for group level
drop if _merge == 2 /
drop _merge

**** ORGANIZATIONAL CAPITAL
merge m:1 sirtg year year_entry using "${input_stata}\orga_capi.dta" 
drop if _merge == 2
drop _merge

* Combine marketing and administrative variables
replace mkting = admin + mkting
replace info = mkting + info
gen dum_hr = hr > 0 & hr < . // Dummy variable for HR presence

*** ALTERNATIVE PERFORMANCE METRICS
gen vaj_eff_siren = vaj_en / eff_3112_en // Value added per employee
gen ebit_eff_siren = ebit_en / eff_3112_en // Earnings before interest and taxes per employee
gen roa_siren = ebit_en / tactibt_en // Return on assets
gen oi_sales = ebit_en / ca_en // Operating income to sales ratio

* Winsorize variables to handle outliers at the 1st and 99th percentiles
foreach x of varlist vaj_eff_siren ebit_eff_siren roa_siren oi_sales {
    su `x', d 
    gen `x'_w = `x' if `x' >= `r(p1)' & `x' <= `r(p99)'
    replace `x'_w = `r(p99)' if `x' > `r(p99)'
    replace `x'_w = `r(p1)' if `x' < `r(p1)'
}

summ vaj_eff_siren ebit_eff_siren roa_siren oi_sales
summ vaj_eff_siren_w ebit_eff_siren_w roa_siren_w oi_sales_w

***** IDENTIFIERS
gen delta = year - year_entry // Calculate the time delta
drop if delta == .

destring sirtg, gen(sirtg_num) force
destring code_entry, gen(code_entry_num)

* Generate unique identifiers sectors and groups
egen orig_dest = group(apgr_1 code_entry)	
egen orig_dest_year_e = group(apgr_1 code_entry year_entry)	 
egen id = group(siren sirtg year_entry code_entry)

gen delta_b = delta + 4 // Adjust delta for reshaping

******* CONTROL VARIABLES FOR PANEL DATA
xtset id delta_b // Define panel data structure

gen log_eff = log(eff_3112_gr)
gen vaj_eff = vaj / eff_3112_gr
gen diversity = nb_pcs / eff_3112_gr
gen size = log(L.eff_3112_gr)
gen immo_eff = immocorp / (eff_3112_gr * 1000)
gen sal_eff = sum_s_brut / eff_3112_gr
gen tresact_eff = tresact / (eff_3112_gr * 1000)

* Treat outliers using the interquartile range
foreach v of varlist s001 vaj_eff diversity size immo_eff sal_eff tresact_eff {
    su `v', d
    gen p50 = r(p50)
    gen p95 = r(p95)
    gen p5 = r(p5)
    replace `v' = . if `v' < p50 - 3 * (p95 - p5) | `v' > p50 + 3 * (p95 - p5)
    drop p50 p95 p5
}

gen log_sales = log(s001) 

* Retrieve control variables from t-1
preserve
keep if year == year_entry - 1
keep sirtg siren year_entry code_entry vaj_eff diversity size immo_eff sal_eff tresact_eff
rename (vaj_eff diversity size immo_eff sal_eff tresact_eff) (vaj_eff_t1 diversity_t1 size_t1 immo_eff_t1 sal_eff_t1 tresact_eff_t1)
save "${output_stata}\ctrl_var_t1_gp.dta", replace
restore

merge m:1 sirtg siren year_entry code_entry using "${output_stata}\ctrl_var_t1_gp.dta"
drop _merge

****** FOCUS ON DELTA RANGE -4 TO +4
keep if delta >= -4 & delta <= 4

* EXPORTS for different perf variables
preserve
keep id delta_b s001
rename s001 s001_
reshape wide s001, i(id) j(delta_b)

foreach l of num 0/8 {
    replace s001_`l' = 0 if s001_`l' == .
}	 
save "${output_stata}\sales_dyn.dta", replace
restore

preserve
keep id delta_b eff_3112_en
rename eff eff_
reshape wide eff, i(id) j(delta_b)

foreach l of num 0/8 {
    replace eff_`l' = 0 if eff_`l' == .
}	 
save "${output_stata}\eff_dyn.dta", replace
restore

capture drop _merge
merge m:1 id using "${output_stata}\eff_dyn.dta"
capture drop _merge	 

preserve
keep id delta_b HC_dist_var
rename HC_dist_var HC_dist_var_
reshape wide HC_dist_var_, i(id) j(delta_b)

 foreach l of num 0/8 {
replace HC_dist_var_l' = 0 if HC_dist_var_l' == .
     }	 
	 
save "${output_stata}\HC_dist_var.dta", replace
restore
capture drop _merge
merge m:1 id using "${output_stata}\HC_dist_var.dta"
capture drop _merge	 

*Effectif Top5
preserve
keep id delta_b top5
rename top5 top5_
reshape wide top5, i(id) j(delta_b)
	 
save "${output_stata}\top5_dyn.dta", replace
restore

preserve
keep id delta_b not_top5
rename not_top5 not_top5_
reshape wide not_top5, i(id) j(delta_b)
	 
save "${output_stata}\not_top5_dyn.dta", replace
restore

capture drop _merge
merge m:1 id using "${output_stata}\top5_dyn.dta"
capture drop _merge	 

capture drop _merge
merge m:1 id using "${output_stata}\not_top5_dyn.dta"
capture drop _merge	 

 foreach l of num 0/8 {
replace top5_l' = 0 if top5_l' == .
replace  not_top5_l' = 0 if not_top5_l'  == .
     }	 

*Add employment data
drop code_entry 
drop code
rename code_entry_num code
destring siren, replace force
merge m:1 sirtg year_entry siren code using "${input_stata}\employment_group.dta", keep(1 3) nogen
rename code code_entry_num	 
	 
*One entering firm per group
gen share = s001_4/l1_cagr
keep if  (share > 0.01 &share <.) | entree == "external"
bys sirtg year_entry code_entry year : gen dup = _N
su dup,d
drop if dup >1 & entree != "external"
drop dup

* Create dummy variable for HC distances
preserve
keep if year == year_entry - 1 
gquantiles q_HC = HC_distance_t1_s_brut, nq(2) xtile by(apgr_1 code year_entry)
keep sirtg siren year_entry code_entry q_HC
save "${input_stata}\HC_dummy.dta", replace
restore

merge m:1 sirtg siren year_entry code_entry using "${input_stata}\HC_dummy.dta"
drop _merge
gen q_HC2 = q_HC == 2 // Dummy for top HC quantile

* Create interaction variables for regression analysis
levelsof delta_b, local(levels)
foreach l of local levels {
    gen q_HC2_`l' = q_HC2 * (delta_b == `l')
}

levelsof delta_b, local(levels)
foreach l of local levels {
    gen HC_distance_`l' = HC_distance_t1 * (delta_b == `l')
}

save "${output_stata}\reg_perf.dta", replace
